clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

preliminaries, doutf(../derived/Combined)
graph set window fontface "Times New Roman"


/******
Combine Full Data
****/

*Antidepressants
use "../derived/Antidepressants/antidepressant.dta", clear

merge m:1 study_no using ///
	"../derived/Antidepressants/antidepressant_drug_combos.dta", ///
	keepusing(drug_combo_no) nogen

keep year drug share_female mean_age studyname studyid no_randomised study_no ///
	lengthoftrial sponsor* drug_combo_no sample scale approve_year firm ///
	patent_year drug_type dropouts_total dose_min stdz_* best_* z_* ///
	sig* recruitment treatmentsetting patientstatus multisinglecenter ///
	meanbaselineseverity baselineseverityscale share_respond* addl_drug placeborun ///
	effect_perc effect_perc_relative effect effect_relative

rename stdz_share_respond stdz_outcome_all
rename stdz_share_respond_relative stdz_outcome_relative_all
rename best_share_respond best_outcome
rename z_sharerespond z_outcome
rename sig5_sharerespond sig5_outcome
rename sig10_sharerespond sig10_outcome
drop best_effect_perc z_pereffect sig5_pereffect sig10_pereffect ///
	stdz_effect_perc_relative
save ../derived/Combined/antidepressant_subset.dta, replace

*Antipsychotics
use "../derived/Antipsychotics/antipsychotic.dta", clear

merge m:1 study_no using ///
	"../derived/Antipsychotics/antipsychotic_drug_combos.dta", ///
	keepusing(drug_combo_no) nogen
	
keep year drug share_female mean_age studyname no_randomised study_no ///
	lengthoftrial sponsor* drug_combo_no sample scale approve_year firm ///
	patent_year drug_type dropouts_total dose_min stdz_outcome_all addl_drug ///
	stdz_outcome_relative_all best_outcome z_outcome sig5_outcome ////
	sig10_outcome meanbaselineseverity efficacy_change_PANSS_mean efficacy_PANSS_relative ///
	outcome_all efficacy_perc*

gen baselineseverityscale = scale
gen multisinglecenter = "Unclear"
gen treatmentsetting = "Unclear"
gen patientstatus = "Unclear"
gen recruitment = "Unspecified"
gen placeborun = "Unclear"
rename efficacy_perc effect_perc
save ../derived/Combined/antipsychotic_subset.dta, replace

*Combine
use ../derived/Combined/antidepressant_subset.dta, clear
append using ../derived/Combined/antipsychotic_subset.dta

foreach var of varlist drug_combo_no study_no {
	rename `var' `var'_temp
	egen `var' = group(`var'_temp sample)
	drop `var'_temp
}
egen factor_combo =group(drug drug_combo_no)

egen scale_group=group(scale sample)
egen drug_group=group(drug)
egen length_group = group(lengthoftrial)
egen recruit_group = group(recruit sample)
egen setting_group = group(treatmentsetting sample)
egen status_group=group(patientstatus sample)
egen center_group = group(multi sample)
egen placebo_group = group(placeborun sample)

foreach var of varlist scale_group length_group recruit_group ///
	setting_group status_group center_group lengthoftrial placebo_group {
	bys studyname study_no: egen `var'_temp = min(`var')
	replace `var' = `var'_temp if `var'==.
	drop `var'_temp
}
replace scale_group = 99 if scale_group==.

summ no_random, d
xtile no_random_group = no_random, nq(10)

gen year_group = 1 if year==.
replace year_group = 2 if year <=1960
replace year_group = 3 if year>1960 & year<=1965
replace year_group = 4 if year>1965 & year<=1970
replace year_group = 5 if year>1970 & year<=1975
replace year_group = 6 if year>1975 & year<=1980
replace year_group = 7 if year>1980 & year<=1985
replace year_group = 8 if year>1985 & year<=1990
replace year_group = 9 if year>1990 & year<=1995
replace year_group = 10 if year>1995 & year<=2000
replace year_group = 11 if year>2000 & year<=2005
replace year_group = 12 if year>2005 & year<=2010
replace year_group = 13 if year>2010 & year !=.

gen year_group_alt = 1 if year==.
replace year_group_alt = 2 if year <=1960
replace year_group_alt = 3 if year>1960 & year<=1970
replace year_group_alt = 4 if year>1970 & year<=1980
replace year_group_alt = 5 if year>1980 & year<=1990
replace year_group_alt = 6 if year>1990 & year<=2000
replace year_group_alt = 7 if year>2000 & year<=2010
replace year_group_alt = 8 if year>2010 & year !=.

gen dropout_share = dropouts_total *100/ no_randomised

replace baselineseverity = "HAMD" if baselineseverity =="HAMD unspecified"
replace baselineseverity = "HAMD 17" if baselineseverity =="HAMD17" | ////
	baselineseverity =="HAMD  17"
replace baselineseverity ="HAMD 21" if baselineseverity =="HAMD  21" | ///
	baselineseverity =="HAMD21"
replace baselineseverity ="HAMD 24" if baselineseverity =="HAMD24"
replace baselineseverity = "MADRS" if baselineseverity =="MADRIS"

gen stdz_baseline = .
foreach base in "HAMD 17" "HAMD 21" "PANSS" "MADRS" "HAMD" "BPRS" "HAMD 24" "CGI" {
	summ meanbaseline if baselineseverity =="`base'"
	replace stdz_baseline = (meanbaseline - `r(mean)') / `r(sd)' if baselineseverity =="`base'"
}
gen standard_scale = inlist(baselineseverityscale, "BPRS", "CGI", "HAMD", ///
	"HAMD 17", "HAMD 21", "HAMD 24", "HAMD 29", "HAMD 31", "PANSS")
gen anti_sample = sample == "anti"
gen percent_female = share_female *100

gen post_approval = year>approve_year & year!=. & approve_year!=.
gen unique_id=_n


gen nct_id = substr(studyname,strpos(studyname,"NCT"),11)
replace nct_id = substr(studyid,strpos(studyid,"NCT"),11) if nct_id==""
gen linked = nct_id !=""

gen rel_yr = year - approve_year
egen rel_yr_group = group(rel_yr), mi
gen arm_no = _n

drop if share_respond==. & outcome_all==.
bys studyname: gen num_drugs=_N
save ../derived/Combined/combined_arm_level.dta, replace



***
** Create variation metric ***
** Excludes unpublished papers
***
use study_no studyname drug sponsor drug_combo_no year using ../derived/Combined/combined_arm_level.dta, clear
*Identify drugs within combinations that have variation in sponsorship
**** New!! exclude unpublished papers
bys drug_combo_no drug: egen mean_sponsor = mean(sponsor) if year!=.
gen variation_drug = mean_sponsor!=1 & mean_sponsor!=0 & mean_sponsor!=.
*Flag whole drug class
bys drug_combo_no: egen variation_combo = max(variation_drug)
collapse (first) variation_combo mean_sponsor, by(study_no studyname drug drug_combo_no)

bys studyname: gen unique_drug=_N
gen placebo_temp = drug=="placebo"
bys studyname: egen placebo_control=max(placebo)
gen always_sponsor_temp = mean_sponsor==1
bys studyname: egen always_sponsor=max(always_sponsor_temp)
gen never_sponsor_temp = mean_sponsor==0
bys studyname: egen never_sponsor=max(never_sponsor_temp)

gen active_placebo_combo = unique_drug==2 & placebo_control==1
gen active_active_never_combo = unique_drug==2 & placebo_control==0 & never_sponsor
gen active_active_always_combo = unique_drug==2 & placebo_control==0 & always_sponsor
gen active_active_both_combo = unique_drug==2 & placebo_control==0 & ///
	(never_sponsor==0 & always_sponsor==0)
gen three_drugs_combo = unique_drug>2

drop unique* placebo* always* never*
save ../derived/Combined/variation_drug.dta, replace

keep study_no studyname *combo drug_combo_no
duplicates drop
save ../derived/Combined/variation_combo.dta, replace

/*******
Combine Full Data Long
******/

use ../derived/Combined/combined_arm_level.dta, clear
merge m:1 study_no studyname using ../derived/Combined/variation_combo.dta, keepusing(*combo) assert(3) ///
	keep(3) nogen
save ../derived/Combined/combined_arm_level.dta, replace


/*******
Combine Full Data Wide
******/

use ../derived/Combined/combined_arm_level.dta, clear
keep study_no drug_combo_no studyname drug sponsor* sig*_outcome stdz_outcome* ///
	approve_year patent_year firm drug_type no_randomised sample standard_scale ///
	scale_group anti_sample studyid ///
	setting_group status_group center_group placebo_group year_group no_random_group ///
	recruit_group lengthoftrial stdz_baseline dropout_share dose_min ///
	mean_age share_female percent_female post_approval year unique_id efficacy_change_PANSS_mean ///
	share_respond best_outcome z_outcome effect_perc effect linked ///
	arm_no

replace drug="zplacebo" if drug=="placebo"
bys study_no (drug unique_id): gen drug_id=_n
replace drug="placebo" if drug=="zplacebo"
reshape wide drug sponsor* sig*_outcome stdz_outcome* ///
	approve_year patent_year firm drug_type no_randomised ///
	no_random_group stdz_baseline dropout_share dose_min arm_no ///
	mean_age share_female percent_female post_approval unique_id efficacy_change_PANSS_mean ///
	share_respond effect_perc best_outcome z_outcome effect, ///
	i(study_no year year_group scale_group standard_scale studyname drug_combo_no ///
	anti_sample lengthoftrial linked) j(drug_id)

merge 1:1 study_no studyname using ../derived/Combined/variation_combo.dta, keepusing(*combo) assert(3) ///
	keep(3) nogen
egen enroll = rowtotal(no_randomised*)
gen unsponsored_trial = sponsor1!=1 & sponsor2!=1 & sponsor3!=1 & sponsor4!=1 & sponsor5!=1 & sponsor6!=1 & sponsor7!=1
bys drug_combo_no (year): gen order=_n

order studyname study_no sample drug1 no_randomised1 sponsor1 drug2 no_randomised2 sponsor2 ///
	drug3 no_randomised3 sponsor3 drug4 no_randomised4 sponsor4 drug5 no_randomised5 sponsor5 ///
	drug6 no_randomised6 sponsor6 drug7 no_randomised7 sponsor7
save ../derived/Combined/combined_arm_level_wide.dta, replace


***********
* Combined Dataset at Drug-Pair Level
*********

* Aside: rational for weights since some arms are now used many times in pairs	
use "../derived/Combined/combined_arm_level_wide.dta", clear
egen num_drugs = rowmiss(drug1 drug2 drug3 drug4 drug5 drug6 drug7)
replace num_drugs = 7 - num_drugs
tab num_drugs
			
clear
cap erase ../derived/Combined/pair_regs.dta
local num_drugs 7
use "../derived/Combined/combined_arm_level_wide.dta", clear
egen num_drugs = rowmiss(drug1 drug2 drug3 drug4 drug5 drug6 drug7)
replace num_drugs = 7 - num_drugs
save ../derived/Combined/pair_regs_initial.dta, replace
forvalues n=1/`num_drugs' {
	forvalues j = 1/`num_drugs' {
		if (`n'<`j'){	
			use drug`n' sponsor`n' drug`j' sponsor`j' anti_sample study_no linked ///
				scale standard_scale enroll order recruit *_group year stdz_outcome_all`n' ////
				year unsponsored_trial stdz_outcome_relative_all`n' stdz_outcome_relative_all`j' ///
				stdz_outcome_all`j' studyname num_drugs variation lengthoftrial ///
				no_randomised`n' no_randomised`j' mean_age`n' mean_age`j' ///
				share_female`n' share_female`j' percent_female`n' percent_female`j' ///
				dose_min`n' dose_min`j' unique_id`n' unique_id`j' ////
				dropout_share`n' dropout_share`j' stdz_baseline`n' stdz_baseline`j' ///
				sig5_outcome`n' sig5_outcome`j' sig10_outcome`n' sig10_outcome`j' ///
				efficacy_change_PANSS_mean`n' efficacy_change_PANSS_mean`j' ///
				share_respond`n' share_respond`j' best_outcome`n' best_outcome`j' ///
				z_outcome`n' z_outcome`j' sponsor_remove_coi`n' sponsor_remove_coi`j' approve_year`n' ///
				approve_year`j' patent_year`n' patent_year`j' no_random_group`n' no_random_group`j' ///
				effect_perc`n' effect_perc`j' ///
				effect`n' effect`j' arm_no`n' arm_no`j' firm`n' firm`j' drug_type`n' drug_type`j' using ///
				"../derived/Combined/pair_regs_initial.dta", clear
			
			foreach var in drug sponsor stdz_outcome_all stdz_outcome_relative_all ///
				no_randomised mean_age share_female percent_female dose_min ///
				dropout_share unique_id firm drug_type patent_year ///
				stdz_baseline sig5_outcome sig10_outcome efficacy_change_PANSS_mean ///
				share_respond best_outcome z_outcome sponsor_remove_coi approve_year no_random_group ///
				effect_perc effect arm_no {
				rename `var'`n' `var'1
				rename `var'`j' `var'2
			}
			drop if drug1=="" | drug2==""
			if !(`n'==1 & `j'==2){
				append using ../derived/Combined/pair_regs.dta
			}
			save ../derived/Combined/pair_regs.dta, replace
		}
	}
}
save ../derived/Combined/combined_pair_level_wide.dta, replace
count

use ../derived/Combined/combined_pair_level_wide.dta, clear
gen pair_trial_no =_n
egen drug_pair_no = group(drug1 drug2)
** New cut unpublished
bys drug_pair_no: egen mean_sponsor1 = mean(sponsor1) if year!=.
bys drug_pair_no: egen mean_sponsor2 = mean(sponsor2) if year!=.
gen variation_pair = (mean_sponsor1 !=1 & mean_sponsor1!=0 & mean_sponsor1!=.) | (mean_sponsor2 !=1 & mean_sponsor2!=0 & mean_sponsor2!=.)

gen active_placebo_pair = drug2=="placebo"
gen same_type_pair = drug_type1==drug_type2
gen same_firm_pair = firm1 == firm2
gen active_active_never_pair = (mean_sponsor1 ==0) | (mean_sponsor2 ==0) & drug2!="placebo"
gen active_active_always_pair = (mean_sponsor1 ==1) | (mean_sponsor2 ==1) & drug2!="placebo"
gen active_active_both_pair = (mean_sponsor1 !=1 & mean_sponsor1!=0) &  ///
	(mean_sponsor2 !=1 & mean_sponsor2!=0)& drug2!="placebo"

egen temp=rowmin(stdz_outcome_all1 stdz_outcome_all2)
replace temp = stdz_outcome_all2 if drug2=="placebo"
gen new_relative1=stdz_outcome_all1 - temp
gen new_relative2=stdz_outcome_all2 - temp
drop temp


egen temp=rowmin(effect_perc1 effect_perc2)
replace temp = effect_perc2 if drug2=="placebo"
gen new_relative_effect_perc1=effect_perc1 - temp
gen new_relative_effect_perc2=effect_perc2 - temp
drop temp


reshape long drug sponsor sponsor_remove_coi stdz_outcome_all new_relative ///
	new_relative_effect_perc new_relative_effect stdz_outcome_relative_all unique_id effect effect_perc ///
	no_randomised mean_age share_female percent_female dose_min dropout_share stdz_baseline mean_sponsor ///
	sig5_outcome sig10_outcome efficacy_change_PANSS_mean approve_year patent_year no_random_group ///
	share_respond best_outcome z_outcome arm_no firm drug_type, ///
	i(pair_trial_no studyname study_no anti_sample unsponsored_trial) j(num)

egen factor_pair = group(drug_pair_no drug)
egen cluster_group = group(studyname anti_sample)
gen weight = 1/(num_drugs-1)

gen rel_yr = year - approve_year
egen rel_yr_group = group(rel_yr), mi
save ../derived/Combined/combined_pair_level.dta, replace

** Clean up files
cap erase ../derived/Combined/antidepressant_subset.dta
cap erase ../derived/Combined/antipsychotic_subset.dta
cap erase ../derived/Combined/pair_regs.dta
cap erase ../derived/Combined/pair_regs_initial.dta
cap erase ../derived/Combined/variation_combo.dta
cap erase ../derived/Combined/variation_drug.dta
cap erase ../derived/Combined/combined_pair_level_wide.dta